package com.momo.demo11_utils;

import java.io.IOException;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;

/**
 * 获取日志当中的关键字
 */
public class KeyWordUtil {

	// 提取关键词
	public static List<String> getKeyWord(String text) throws IOException {
		List<String> keywords = new ArrayList<String>();
		if (text != null && !text.equals("")) {
			Map<String, Integer> words = new HashMap<String, Integer>();
			Analyzer analyzer = new IKAnalyzer(true);
			StringReader reader = new StringReader(text);
			TokenStream tokenStream = analyzer.tokenStream("*", reader);
			TermAttribute termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
			while (tokenStream.incrementToken()) {
				String word = termAtt.term();
				if (word.length() > 1 && strlen(word, "GBK") > 2) {
					Integer count = words.get(word);
					if (count == null) {
						count = 0;
					}
					words.put(word, count + 1);
				}
			}
			if (words.size() > 0) {
				Directory dir = null;
				IndexSearcher searcher = null;
				try {
					String fieldName = "text";
					dir = new RAMDirectory();
					IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
					Document doc = new Document();
					doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
					writer.addDocument(doc);
					writer.close();
					searcher = new IndexSearcher(dir);
					searcher.setSimilarity(new IKSimilarity());
					Set<String> keys = words.keySet();
					Map<String, Float> temps = new HashMap<String, Float>();
					for (String key : keys) {
						int count = words.get(key);
						Query query = IKQueryParser.parse(fieldName, key);
						TopDocs topDocs = searcher.search(query, 1);
						if (topDocs.totalHits > 0) {
							temps.put(key, topDocs.getMaxScore() * count);
						}
					}
					Entry<String, Float>[] keywordEntry = getSortedHashtableByValue(temps);
					for (Entry<String, Float> entry : keywordEntry) {
						if (keywords.size() < 5) {// 取前五个关键字
							keywords.add(entry.getKey());
						}
					}
				}
				catch (Exception e) {
					e.printStackTrace();
				}
				finally {
					try {
						searcher.close();
					}
					catch (IOException e) {
						e.printStackTrace();
					}
					try {
						dir.close();
					}
					catch (IOException e) {
						e.printStackTrace();
					}
				}
			}
		}
		return keywords;
	}

	// 获取文本长度
	public static int strlen(String text, String charsetName) {
		if (text == null || text.length() == 0) {
			return 0;
		}
		int length = 0;
		try {
			length = text.getBytes(charsetName).length;
		}
		catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		}
		return length;
	}

	@SuppressWarnings("unchecked")
	public static Entry<String, Float>[] getSortedHashtableByValue(Map<String, Float> h) {
		Set<Entry<String, Float>> set = h.entrySet();
		Entry<String, Float>[] entries = set.toArray(new Entry[set.size()]);
		Arrays.sort(entries, new Comparator() {
			public int compare(Object arg0, Object arg1) {
				Entry entry1 = (Entry) arg0;
				Entry entry2 = (Entry) arg1;
				Float value1 = (Float) entry1.getValue();
				Float value2 = (Float) entry2.getValue();
				int size = value2.compareTo(value1);
				if (size == 0) {
					String key1 = (String) entry1.getKey();
					String key2 = (String) entry2.getKey();
					return key1.compareTo(key2);
				}
				return size;
			}
		});
		return entries;
	}

	// 将对象转化成字符串
	@SuppressWarnings("unchecked")
	public static String implode(Object data, String separator) {
		if (data == null) {
			return "";
		}
		StringBuffer out = new StringBuffer();
		if (data instanceof Object[]) {
			boolean flag = false;
			for (Object obj : (Object[]) data) {
				if (flag) {
					out.append(separator);
				}
				else {
					flag = true;
				}
				out.append(obj);
			}
		}
		else if (data instanceof Map) {
			Map temp = (Map) data;
			Set<Object> keys = temp.keySet();
			boolean flag = false;
			for (Object key : keys) {
				if (flag) {
					out.append(separator);
				}
				else {
					flag = true;
				}
				out.append(temp.get(key));
			}
		}
		else if (data instanceof Collection) {
			boolean flag = false;
			for (Object obj : (Collection) data) {
				if (flag) {
					out.append(separator);
				}
				else {
					flag = true;
				}
				out.append(obj);
			}
		}
		else {
			return data.toString();
		}
		return out.toString();
	}

	public static String[] termsFormAnalysis(Analyzer analyzer, String text) throws IOException {
		TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
		// stream.addAttribute(TermAttribute.class);
		ArrayList<String> termList = new ArrayList<String>();

		// Lucene0改用TokenStream.increamentToken()方法向下一项移动
		while (stream.incrementToken()) {
			TermAttribute termAttribute = (TermAttribute) stream.getAttribute(TermAttribute.class);
			termList.add(termAttribute.term());
		}
		return (String[]) termList.toArray(new String[0]);
	}
}
